In [1]:
#import polars as pl
import pandas as pd
import numpy as np
import scipy
import scipy.stats
import math

import matplotlib.pyplot as plt
import plotly.graph_objects as go

import os
from glob import glob
In [2]:
DATA_PATH = "data"
CASES = ["healthy_subset_pictures-glucose-food", "diabetes_subset_pictures-glucose-food-insulin"]
timestamp_formats = {'healthy_subset_pictures-glucose-food': '%Y-%m-%d %H:%M', 
                     'diabetes_subset_pictures-glucose-food-insulin': '%Y-%m-%d %H:%M:%S'}
In [3]:
seed = 42
np.random.seed(seed)
In [4]:
fig = go.Figure()

i = 0
j = 0
#for case in CASES:
case = "healthy_subset_pictures-glucose-food"

for patient_glucose in glob(f"{DATA_PATH}/{case}/*/glucose.csv"):
    _df = pd.read_csv(patient_glucose)
    _df.loc[:, 'timestamp'] = pd.to_datetime(_df.loc[:, 'date'] + " " + _df.loc[:, 'time'].str[:5])
    _df = _df.loc[:, ['timestamp', 'glucose']].set_index('timestamp')
    # _df = _df.resample('12h').max()
    if (i == 0):
         df_compl_healthy = _df.copy()
    elif (i==1):
        df_test_healthy = _df.copy()
    else:
        df_compl_healthy = df_compl_healthy.append(_df)

    fig.add_trace(go.Scatter(x=_df.index, 
                             y=_df.iloc[:, 0],
                        name=f"{case.split('_')[0]} {patient_glucose.split('/')[-2]}"))
    i+=1


fig.update_layout(title_text = "Healty patients monitoring")
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Glucose level (MMol/l)")
fig.show()
In [5]:
fig = go.Figure()

i = 0
j = 0
#for case in CASES:
case = "diabetes_subset_pictures-glucose-food-insulin"

for patient_glucose in glob(f"{DATA_PATH}/{case}/*/glucose.csv"):
    _df = pd.read_csv(patient_glucose)
    _df.loc[:, 'timestamp'] = pd.to_datetime(_df.loc[:, 'date'] + " " + _df.loc[:, 'time'].str[:5])
    _df = _df.loc[:, ['timestamp', 'glucose']].set_index('timestamp')
    # _df = _df.resample('12h').max()
    if (i == 0):
         df_compl_diab = _df.copy()
    elif (i==1):
        df_test_diab = _df.copy()
    else:
        df_compl_diab = df_compl_diab.append(_df)

    if (j == 0 and case == "diabetes_subset_pictures-glucose-food-insulin"):
        j+=1
        _df2 = _df.copy()

    fig.add_trace(go.Scatter(x=_df.index, 
                             y=_df.iloc[:, 0],
                        name=f"{case.split('_')[0]} {patient_glucose.split('/')[-2]}"))
    i+=1


fig.update_layout(title_text = "Diabetic patients monitoring")
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Glucose level (MMol/l)")
fig.show()
In [ ]:
 
In [6]:
df_compl_healthy
Out[6]:
glucose
timestamp
2014-10-01 11:30:00 5.2
2014-10-01 14:00:00 4.9
2014-10-01 23:13:00 5.5
2014-10-02 08:18:00 5.7
2014-10-02 10:20:00 4.9
... ...
2014-10-03 22:00:00 5.4
2014-10-03 09:00:00 5.4
2014-10-04 11:00:00 5.3
2014-10-04 12:00:00 4.6
2014-10-04 14:30:00 5.1

446 rows × 1 columns

In [7]:
df_compl_diab
Out[7]:
glucose
timestamp
2014-10-01 11:30:00 9.4
2014-10-01 11:35:00 9.2
2014-10-01 11:40:00 8.3
2014-10-01 11:40:00 9.0
2014-10-01 11:45:00 8.7
... ...
2014-10-01 23:43:00 8.7
2014-10-01 23:48:00 8.5
2014-10-01 23:53:00 8.4
2014-10-01 23:58:00 8.3
2014-10-02 00:03:00 8.2

7237 rows × 1 columns

In [8]:
df_test_healthy
Out[8]:
glucose
timestamp
2014-10-01 07:57:00 5.4
2014-10-01 09:59:00 5.8
2014-10-01 12:10:00 5.5
2014-10-01 14:30:00 5.3
2014-10-01 19:00:00 4.6
2014-10-01 21:20:00 6.5
2014-10-02 07:32:00 5.6
2014-10-02 09:40:00 5.9
2014-10-02 12:10:00 5.4
2014-10-02 15:00:00 6.5
2014-10-02 20:00:00 4.8
2014-10-02 21:55:00 5.9
2014-10-03 07:00:00 5.2
2014-10-03 09:10:00 5.6
2014-10-03 12:10:00 5.7
2014-10-03 14:05:00 5.6
2014-10-03 19:55:00 5.9
2014-10-03 20:56:00 5.2
2014-10-04 08:10:00 5.4
2014-10-04 10:30:00 5.9
2014-10-04 12:05:00 6.1
2014-10-04 15:05:00 5.3
2014-10-04 18:40:00 4.9
2014-10-04 20:40:00 7.2
In [9]:
df_test_diab
Out[9]:
glucose
timestamp
2014-10-01 08:24:00 11.8
2014-10-01 08:29:00 12.7
2014-10-01 08:30:00 13.0
2014-10-01 08:34:00 13.3
2014-10-01 08:39:00 13.8
... ...
2014-10-04 16:44:00 15.7
2014-10-04 16:49:00 16.3
2014-10-04 16:54:00 16.9
2014-10-04 16:59:00 17.5
2014-10-04 17:04:00 18.0

984 rows × 1 columns

In [10]:
df_compl_eq = df_compl_healthy.append(df_compl_diab.sample(frac=1, random_state=seed).iloc[:446])
In [11]:
df_compl_eq
Out[11]:
glucose
timestamp
2014-10-01 11:30:00 5.2
2014-10-01 14:00:00 4.9
2014-10-01 23:13:00 5.5
2014-10-02 08:18:00 5.7
2014-10-02 10:20:00 4.9
... ...
2014-10-03 02:50:00 8.9
2014-10-04 06:21:00 3.8
2014-10-05 12:00:00 9.7
2014-10-04 18:09:00 9.3
2014-09-30 14:01:00 3.8

892 rows × 1 columns

In [12]:
df_compl_eq_perc = df_compl_healthy.append(df_compl_diab.sample(frac=1, random_state=seed).iloc[:48])
In [13]:
df_compl_eq_perc
Out[13]:
glucose
timestamp
2014-10-01 11:30:00 5.2
2014-10-01 14:00:00 4.9
2014-10-01 23:13:00 5.5
2014-10-02 08:18:00 5.7
2014-10-02 10:20:00 4.9
... ...
2014-10-04 02:47:00 17.7
2014-09-30 17:46:00 8.9
2014-10-05 06:45:00 16.8
2014-10-04 04:55:00 5.6
2014-10-02 08:25:00 13.4

494 rows × 1 columns

In [14]:
#define delta function, used for learning and testing
In [15]:
def calculate_delta(curve, dataset): 
    histogram = np.zeros(25)

    for x in dataset:
        histogram[int(x)] += 1

    distances = np.zeros(25)

    for value in range(len(histogram)):
        #print(histogram[value])
        #print(dist.pdf(value, *arg, loc=loc, scale=scale) * len(a_valid_diab))
        distances[value] = math.sqrt((histogram[value]/len(dataset) - curve.pdf(value, *arg, loc=loc, scale=scale) )**2)
        
    return np.mean(distances)

General Model equilibrated on the actual percentage of people with diabetes¶

In [16]:
#filter df for an error
a = []
for index, row in df_compl_eq_perc.iterrows():
    if not row['glucose'] == "7:0":
        a.append(float(row['glucose']))
a = np.array(a, dtype=float)
In [17]:
size = len(a)
x = np.arange(size)
y = a
h = plt.hist(y, bins=range(25))

#dist_names = ['gamma', 'beta', 'rayleigh', 'norm']
dist_name = 'rayleigh'
#for dist_name in dist_names:
dist = getattr(scipy.stats, dist_name)
params = dist.fit(y)
arg = params[:-2]
loc = params[-2]
scale = params[-1]

if arg:
    pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size
else:
    pdf_fitted = dist.pdf(x, loc=loc, scale=scale) * size

plt.plot(pdf_fitted, label="Rayleigh model")
plt.xlim(0,25)

min_int, max_int = dist.interval(.95, loc=loc, scale=scale)

plt.axvline(x = min_int, color = 'r')
plt.axvline(x = max_int, color = 'r', label = '.95 confidence interval')

plt.title('Generic model')
plt.legend(loc='upper right')
plt.xlabel('Glucose level (MMol/l)')
plt.show()
In [18]:
a_test_diab = []
for index, row in df_test_diab.iterrows():
    if not row['glucose'] == "7:0":
        a_test_diab.append(float(row['glucose']))
a_test_diab = np.array(a_test_diab, dtype=float)
In [19]:
size = len(a_test_diab)
x = np.arange(size)
y = a_test_diab
h = plt.hist(y, bins=range(25))

#dist_names = ['gamma', 'beta', 'rayleigh', 'norm']
dist_name = 'rayleigh'

dist1 = getattr(scipy.stats, dist_name)
params = dist1.fit(y)
#arg = params[:-2]
#loc = params[-2]
#scale = params[-1]

""""if arg:
    pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size
else:
    pdf_fitted = dist.pdf(x, loc=loc, scale=scale) * size
"""
pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size

plt.plot(pdf_fitted, label="Rayleigh model")
plt.xlim(0,25)

#min_int, max_int = dist.interval(.95, loc=loc, scale=scale)

plt.axvline(x = min_int, color = 'r')
plt.axvline(x = max_int, color = 'r', label = '.95 confidence interval')

plt.title('Generic model on diabetic patient')
plt.legend(loc='upper right')
plt.xlabel('Glucose level (MMol/l)')
plt.savefig('Generic_model_diab_patient.png')
plt.show()
In [20]:
print("test value for diabetic patient:")
print(calculate_delta(dist, a_test_diab))
test value for diabetic patient:
0.04361268989899099
In [21]:
a_test_healthy = []
for index, row in df_test_healthy.iterrows():
    if not row['glucose'] == "7:0":
        a_test_healthy.append(float(row['glucose']))
a_test_healthy = np.array(a_test_healthy, dtype=float)
In [22]:
size = len(a_test_healthy)
x = np.arange(size)
y = a_test_healthy
h = plt.hist(y, bins=range(25))

#dist_names = ['gamma', 'beta', 'rayleigh', 'norm']
dist_name = 'rayleigh'

dist1 = getattr(scipy.stats, dist_name)
params = dist1.fit(y)
#arg = params[:-2]
#loc = params[-2]
#scale = params[-1]

""""if arg:
    pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size
else:
    pdf_fitted = dist.pdf(x, loc=loc, scale=scale) * size
"""
pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size

plt.plot(pdf_fitted, label="Rayleigh model")
plt.xlim(0,25)

#min_int, max_int = dist.interval(.95, loc=loc, scale=scale)

plt.axvline(x = min_int, color = 'r')
plt.axvline(x = max_int, color = 'r', label = '.95 confidence interval')

plt.title('Generic model on healthy patient')
plt.legend(loc='upper right')
plt.xlabel('Glucose level (MMol/l)')
plt.savefig('Generic_model_healthy_patient.png')
plt.show()
In [23]:
print("test value for healthy patient:")
print(calculate_delta(dist, a_test_healthy))
test value for healthy patient:
0.04305259861429515

soft personalization¶

define test and validation dataset¶

the validation/test split is 50%

In [24]:
a_test_healty = []
for index, row in df_test_healthy.iterrows():
    if not row['glucose'] == "7:0":
        a_test_healty.append(float(row['glucose']))
a_test_healty = np.array(a_test_healty, dtype=float)

a_valid_healthy = a_test_healty[12:]
a_test_healthy = a_test_healty[:12] 

len(a_test_healthy)
Out[24]:
12
In [25]:
a_test_diab = []
for index, row in df_test_diab.iterrows():
    if not row['glucose'] == "7:0":
        a_test_diab.append(float(row['glucose']))
a_test_diab = np.array(a_test_diab, dtype=float)

a_valid_diab = a_test_diab[:492]
a_test_diab = a_test_diab[492:] 

len(a_test_diab)
Out[25]:
492
In [ ]:
 
In [ ]:
 
In [26]:
y = a_test_healthy
h = plt.hist(y, bins=range(25))

plt.title('Healthy patient data')
plt.xlabel('Glucose level (MMol/l)')
#plt.legend(loc='upper right')
plt.savefig("healthy_patient_data.png")
plt.show()
In [27]:
y = a_test_diab
h = plt.hist(y, bins=range(25))

plt.title('Diabetic patient data')
plt.xlabel('Glucose level (MMol/l)')
#plt.legend(loc='upper right')
plt.show()

Model for healty people¶

In [28]:
#filter df for an error
a = []
for index, row in df_compl_healthy.iterrows():
    if not row['glucose'] == "7:0":
        a.append(float(row['glucose']))
a = np.array(a, dtype=float)
In [29]:
size = len(a)
x = np.arange(size)
y = a
h = plt.hist(y, bins=range(25))

#dist_names = ['gamma', 'beta', 'rayleigh', 'norm']
dist_name = 'rayleigh'
#for dist_name in dist_names:
dist = getattr(scipy.stats, dist_name)
params = dist.fit(y)
arg = params[:-2]
loc = params[-2]
scale = params[-1]

if arg:
    pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size
else:
    pdf_fitted = dist.pdf(x, loc=loc, scale=scale) * size

plt.plot(pdf_fitted, label="Rayleigh model")
plt.xlim(0,25)

min_int, max_int = dist.interval(.95, loc=loc, scale=scale)

plt.axvline(x = min_int, color = 'r')
plt.axvline(x = max_int, color = 'r', label = '.95 confidence interval')

plt.title('Healthy model')
plt.xlabel('Glucose level (MMol/l)')
plt.legend(loc='upper right')
plt.show()

Test model on diabetic patient¶

In [30]:
size = len(a_test_diab)
x = np.arange(size)
y = a_test_diab
h = plt.hist(y, bins=range(25))

#dist_names = ['gamma', 'beta', 'rayleigh', 'norm']
dist_name = 'rayleigh'

dist1 = getattr(scipy.stats, dist_name)
params = dist1.fit(y)
#arg = params[:-2]
#loc = params[-2]
#scale = params[-1]

""""if arg:
    pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size
else:
    pdf_fitted = dist.pdf(x, loc=loc, scale=scale) * size
"""
pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size

plt.plot(pdf_fitted, label="Rayleigh model")
plt.xlim(0,25)

#min_int, max_int = dist.interval(.95, loc=loc, scale=scale)

plt.axvline(x = min_int, color = 'r')
plt.axvline(x = max_int, color = 'r', label = '.95 confidence interval')

plt.title('Healthy model on diabetic patient')
plt.legend(loc='upper right')
plt.xlabel('Glucose level (MMol/l)')
plt.show()
In [31]:
print("test value for diabetic patient:")
print(calculate_delta(dist, a_test_diab))
test value for diabetic patient:
0.05173044876034623

Test model on healthy patient¶

In [32]:
print("validation value for healthy patient:")
print(calculate_delta(dist, a_valid_healthy))
validation value for healthy patient:
0.042082139526341995
In [33]:
size = len(a_valid_healthy)
x = np.arange(25)
y = a_valid_healthy
h = plt.hist(y, bins=range(25))

#dist_names = ['gamma', 'beta', 'rayleigh', 'norm']
dist_name = 'rayleigh'

dist1 = getattr(scipy.stats, dist_name)
params = dist1.fit(y)
#arg = params[:-2]
#loc = params[-2]
#scale = params[-1]

""""if arg:
    pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size
else:
    pdf_fitted = dist.pdf(x, loc=loc, scale=scale) * size
"""
pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size

plt.plot(pdf_fitted, label="Rayleigh model")
plt.xlim(0,25)

#min_int, max_int = dist.interval(.95, loc=loc, scale=scale)

plt.axvline(x = min_int, color = 'r')
plt.axvline(x = max_int, color = 'r', label = '.95 confidence interval')

plt.title('diabetes model')
plt.legend(loc='upper right')
plt.xlabel('Glucose level (MMol/l)')
plt.savefig('healthy_model_healthy_patient_val.png')
plt.show()
In [34]:
size = len(a_test_healthy)
x = np.arange(25)
y = a_test_healthy
h = plt.hist(y, bins=range(25))

#dist_names = ['gamma', 'beta', 'rayleigh', 'norm']
dist_name = 'rayleigh'

dist1 = getattr(scipy.stats, dist_name)
params = dist1.fit(y)
#arg = params[:-2]
#loc = params[-2]
#scale = params[-1]

""""if arg:
    pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size
else:
    pdf_fitted = dist.pdf(x, loc=loc, scale=scale) * size
"""
pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size

plt.plot(pdf_fitted, label="Rayleigh model")
plt.xlim(0,25)

#min_int, max_int = dist.interval(.95, loc=loc, scale=scale)

plt.axvline(x = min_int, color = 'r')
plt.axvline(x = max_int, color = 'r', label = '.95 confidence interval')

plt.title('Healthy model on healthy patient')
plt.legend(loc='upper right')
plt.xlabel('Glucose level (MMol/l)')
plt.savefig('healthy_model_healthy_patient.png')
plt.show()
In [35]:
print("test value for healthy patient:")
print(calculate_delta(dist, a_test_healthy))
test value for healthy patient:
0.038064009234556224

Model for diabetic people¶

In [36]:
a_diab = []
for index, row in df_compl_diab.iterrows():
    if not row['glucose'] == "7:0":
        a_diab.append(float(row['glucose']))
a_diab = np.array(a_diab, dtype=float)
In [37]:
size = len(a_diab)
x = np.arange(size)
y = a_diab
h = plt.hist(y, bins=range(25))

#dist_names = ['gamma', 'beta', 'rayleigh', 'norm']
dist_name = 'rayleigh'

dist = getattr(scipy.stats, dist_name)
params = dist.fit(y)
arg = params[:-2]
loc = params[-2]
scale = params[-1]

if arg:
    pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size
else:
    pdf_fitted = dist.pdf(x, loc=loc, scale=scale) * size

plt.plot(pdf_fitted, label="Rayleigh model")
plt.xlim(0,25)

min_int, max_int = dist.interval(.95, loc=loc, scale=scale)

plt.axvline(x = min_int, color = 'r')
plt.axvline(x = max_int, color = 'r', label = '.95 confidence interval')

plt.title('diabetes model')
plt.legend(loc='upper right')
plt.xlabel('Glucose level (MMol/l)')
plt.show()

Test model on diabetic patient¶

In [38]:
print("validation value for diabetic patient")
print(calculate_delta(dist, a_valid_diab))
validation value for diabetic patient
0.016343981624997322
In [39]:
size = len(a_test_diab)
x = np.arange(size)
y = a_test_diab
h = plt.hist(y, bins=range(25))

#dist_names = ['gamma', 'beta', 'rayleigh', 'norm']
dist_name = 'rayleigh'

dist1 = getattr(scipy.stats, dist_name)
params = dist1.fit(y)
#arg = params[:-2]
#loc = params[-2]
#scale = params[-1]

""""if arg:
    pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size
else:
    pdf_fitted = dist.pdf(x, loc=loc, scale=scale) * size
"""
pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size

plt.plot(pdf_fitted, label="Rayleigh model")
plt.xlim(0,25)

#min_int, max_int = dist.interval(.95, loc=loc, scale=scale)

plt.axvline(x = min_int, color = 'r')
plt.axvline(x = max_int, color = 'r', label = '.95 confidence interval')

plt.title('diabetes model on diabetic patient')
plt.legend(loc='upper right')
plt.xlabel('Glucose level (MMol/l)')
plt.savefig('diab_model_diab_patient.png')
plt.show()
In [40]:
print("test value for diabetic patient")
print(calculate_delta(dist, a_test_diab))
test value for diabetic patient
0.021513258325668316

Test model on healthy patient¶

In [41]:
print("validation value for healthy patient")
print(calculate_delta(dist, a_valid_healthy))
validation value for healthy patient
0.054828053528473565
In [42]:
size = len(a_valid_healthy)
x = np.arange(25)
y = a_valid_healthy
h = plt.hist(y, bins=range(25))

#dist_names = ['gamma', 'beta', 'rayleigh', 'norm']
dist_name = 'rayleigh'

dist1 = getattr(scipy.stats, dist_name)
params = dist1.fit(y)
#arg = params[:-2]
#loc = params[-2]
#scale = params[-1]

""""if arg:
    pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size
else:
    pdf_fitted = dist.pdf(x, loc=loc, scale=scale) * size
"""
pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size

plt.plot(pdf_fitted, label="Rayleigh model")
plt.xlim(0,25)

#min_int, max_int = dist.interval(.95, loc=loc, scale=scale)

plt.axvline(x = min_int, color = 'r')
plt.axvline(x = max_int, color = 'r', label = '.95 confidence interval')

plt.title('healthy model')
plt.legend(loc='upper right')
plt.xlabel('Glucose level (MMol/l)')
plt.savefig('diab_model_healthy_patient_val.png')
plt.show()
In [43]:
size = len(a_test_healthy)
x = np.arange(25)
y = a_test_healthy
h = plt.hist(y, bins=range(25))

#dist_names = ['gamma', 'beta', 'rayleigh', 'norm']
dist_name = 'rayleigh'

dist1 = getattr(scipy.stats, dist_name)
params = dist1.fit(y)
#arg = params[:-2]
#loc = params[-2]
#scale = params[-1]

""""if arg:
    pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size
else:
    pdf_fitted = dist.pdf(x, loc=loc, scale=scale) * size
"""
pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size

plt.plot(pdf_fitted, label="Rayleigh model")
plt.xlim(0,25)

#min_int, max_int = dist.interval(.95, loc=loc, scale=scale)

plt.axvline(x = min_int, color = 'r')
plt.axvline(x = max_int, color = 'r', label = '.95 confidence interval')

plt.title('diabetes model')
plt.legend(loc='upper right')
plt.xlabel('Glucose level (MMol/l)')
plt.savefig('diab_model_healthy_patient.png')
plt.show()
In [44]:
print("test value for healthy patient")
print(calculate_delta(dist, a_test_healthy))
test value for healthy patient
0.06057173197925015
In [ ]:
 

Full personalization¶

In [45]:
d = []
for index, row in df_test_healthy.iterrows():
    if not row['glucose'] == "7:0":
        d.append(float(row['glucose']))
d = np.array(d, dtype=float)

d_test = d[16:]
d = d[:16] 
len(d)
Out[45]:
16
In [46]:
size = len(d)
x = np.arange(size)
y = d
h = plt.hist(y, bins=range(25))

#dist_names = ['gamma', 'beta', 'rayleigh', 'norm']
dist_name = 'rayleigh'

dist1 = getattr(scipy.stats, dist_name)
params = dist1.fit(y)
arg = params[:-2]
loc = params[-2]
scale = params[-1]

if arg:
    pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size
else:
    pdf_fitted = dist.pdf(x, loc=loc, scale=scale) * size

#pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size

plt.plot(pdf_fitted, label="Rayleigh model")
plt.xlim(0,25)

min_int, max_int = dist1.interval(.95, loc=loc, scale=scale)

plt.axvline(x = min_int, color = 'r')
plt.axvline(x = max_int, color = 'r', label = '.95 confidence interval')

plt.title('Healthy personalized model')
plt.legend(loc='upper right')
plt.xlabel('Glucose level (MMol/l)')
plt.show()
In [47]:
size = len(d_test)
x = np.arange(25)
y = d_test
h = plt.hist(y, bins=range(25))

#dist_names = ['gamma', 'beta', 'rayleigh', 'norm']
dist_name = 'rayleigh'

dist1 = getattr(scipy.stats, dist_name)
params = dist1.fit(y)
#arg = params[:-2]
#loc = params[-2]
#scale = params[-1]

""""if arg:
    pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size
else:
    pdf_fitted = dist.pdf(x, loc=loc, scale=scale) * size
"""
pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size

plt.plot(pdf_fitted, label="Rayleigh model")
plt.xlim(0,25)

#min_int, max_int = dist.interval(.95, loc=loc, scale=scale)

plt.axvline(x = min_int, color = 'r')
plt.axvline(x = max_int, color = 'r', label = '.95 confidence interval')

plt.title('Personalized model on healthy patient data')
plt.legend(loc='upper right')
plt.xlabel('Glucose level (MMol/l)')
plt.savefig('Pers_model_healthy_patient.png')
plt.show()
In [48]:
print("test value for healthy patient:")
print(calculate_delta(dist, d_test))
test value for healthy patient:
0.021620385472515134
In [ ]:
 
In [49]:
d = []
for index, row in df_test_diab.iterrows():
    if not row['glucose'] == "7:0":
        d.append(float(row['glucose']))
d = np.array(d, dtype=float)

d_test = d[:328]
d = d[328:] 
len(d)

len(d)
Out[49]:
656
In [50]:
size = len(d)
x = np.arange(size)
y = d
h = plt.hist(y, bins=range(25))

#dist_names = ['gamma', 'beta', 'rayleigh', 'norm']
dist_name = 'rayleigh'

dist1 = getattr(scipy.stats, dist_name)
params = dist1.fit(y)
arg = params[:-2]
loc = params[-2]
scale = params[-1]

if arg:
    pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size
else:
    pdf_fitted = dist.pdf(x, loc=loc, scale=scale) * size

#pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size

plt.plot(pdf_fitted, label="Rayleigh model")
plt.xlim(0,25)

min_int, max_int = dist1.interval(.95, loc=loc, scale=scale)

plt.axvline(x = min_int, color = 'r')
plt.axvline(x = max_int, color = 'r', label = '.95 confidence interval')

plt.title('Diabetic personalized model')
plt.legend(loc='upper right')
plt.xlabel('Glucose level (MMol/l)')
plt.show()
In [51]:
size = len(d_test)
x = np.arange(26)
y = d_test
h = plt.hist(y, bins=range(25))

#dist_names = ['gamma', 'beta', 'rayleigh', 'norm']
dist_name = 'rayleigh'

dist1 = getattr(scipy.stats, dist_name)
params = dist1.fit(y)
#arg = params[:-2]
#loc = params[-2]
#scale = params[-1]

""""if arg:
    pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size
else:
    pdf_fitted = dist.pdf(x, loc=loc, scale=scale) * size
"""
pdf_fitted = dist.pdf(x, *arg, loc=loc, scale=scale) * size

plt.plot(pdf_fitted, label="Rayleigh model")
plt.xlim(0,25)

#min_int, max_int = dist.interval(.95, loc=loc, scale=scale)

plt.axvline(x = min_int, color = 'r')
plt.axvline(x = max_int, color = 'r', label = '.95 confidence interval')

plt.title('Personalized model on diabetic patient data')
plt.legend(loc='upper right')
plt.xlabel('Glucose level (MMol/l)')
plt.savefig('Pers_model_diab_patient.png')
plt.show()
In [52]:
print("test value for diabetic patient:")
print(calculate_delta(dist, d_test))
test value for diabetic patient:
0.016658116720516394
In [ ]: